% Copyright 2019 by Till Tantau
% Copyright 2019 by Jonathan P. Spratte
%
% This file may be distributed and/or modified
%
% 1. under the LaTeX Project Public License and/or
% 2. under the GNU Free Documentation License.
%
% See the file doc/generic/pgf/licenses/LICENSE for more details.


\section{Parser Module}%
\label{section-module-parser}%

\begin{pgfmodule}{parser}%
    This module defines some commands for creating a simple letter-by-letter
    parser.
\end{pgfmodule}

\begin{package}{pgfparser}
    Because the parser module is almost independent of the rest of \pgfname, it
    can also be used as a standalone package with minimal dependencies.
\end{package}

This module provides commands for defining a parser that scans some given text
letter-by-letter. For each letter, some code is executed and, possibly a
state-switch occurs. The code for each letter might take mandatory or optional
arguments. The parsing process ends when a final state has been reached, and
optionally some code is executed afterwards. Each newly defined parser by
default ignores space tokens, if you want to change that you'll have to
explicitly define an action for blank spaces (with |\pgfparserdef|).

\begin{command}{\pgfparserparse\marg{parser name}\meta{text}}%
  This command is used to parse the \meta{text} using the (previously defined)
  parser named \meta{parser name}.

  The \meta{text} is not contained in curly braces, rather it is all the text
  that follows. The end of the text is determined implicitly, namely when the
  final state of the parser has been reached. If you defined a final action for
  the parser using |\pgfparserdeffinal| it is executed now.

  The parser works as follows: At any moment, it is in a certain
  \emph{state}, initially this state is called |initial|. Then, the first
  letter of the \meta{text} is examined (using the |\futurelet| command). For
  each possible state and each possible letter, some action code is stored in
  the parser in a table. This code is then executed. This code may, but need
  not, trigger a \emph{state switch}, causing a new state to be set. The
  parser then moves on to the next character of the text and repeats the
  whole procedure, unless it is in the state |final|, which causes the
  parsing process to stop immediately.

  In the following example, the parser counts the number of |a|'s in the
  \text{text}, ignoring any |b|'s. The \meta{text} ends with the first~|c|.
  %
\begin{codeexample}[preamble={\usepgfmodule{parser}}]
\newcount\mycount
\pgfparserdef{myparser}{initial}{the letter a}%
{\advance\mycount by 1\relax}%
\pgfparserdef{myparser}{initial}{the letter b}%
{} % do nothing
\pgfparserdef{myparser}{initial}{the letter c}%
{\pgfparserswitch{final}}% done!

\pgfparserparse{myparser}aabaabababbbbbabaabcccc
There are \the\mycount\ a's.
\end{codeexample}%
    %
\end{command}%

\begin{command}%
  {%
    \pgfparserdef\marg{parser name}\marg{state}\meta{symbol meaning}%
    \oarg{arguments}\marg{action}%
  }%
  This command should be used repeatedly to define a parser named
  \meta{parser name}. With a call to this command you specify that the
  \meta{parser name} should do the following: When it is in state
  \meta{state} and reads the letter \meta{symbol meaning}, perform the code
  stored in \meta{action}.

  The \meta{symbol meaning} must be the text that results from applying the
  \TeX\ command |\meaning| to the given character. For instance, |\meaning a|
  yields |the letter a|, while |\meaning 1| yields |the character 1|. A space
  yields |blank space  |. Alternatively you can give the symbol you want without
  surrounding it in braces. So both
  |\pgfparserdef{myparser}{initial}{the letter a}{foo}|
  and
  |\pgfparserdef{myparser}{initial}a{foo}|
  define an \meta{action} for |the letter a|. This short form works for most
  tokens, but not for a space (in which case you can use
  |\pgfparserdef|\allowbreak|{myparser}|\allowbreak|{initial}|\allowbreak
  |{blank space}|\allowbreak|{foo}|%
  ), and opening braces
  (in which case you can use
  |\pgfparserdef|\allowbreak|{myparser}|\allowbreak|{initial}|\allowbreak
  |{\meaning\bgroup}|\allowbreak|{foo}|%
  , and one might prefer
  to use
  |\pgfparserdef|\allowbreak|{myparser}|\allowbreak|{initial}|\allowbreak
  |{\meaning\egroup}|\allowbreak|{foo}|
  for closing braces as well). You can as well define an action for a macro's
  meaning (note that macros with different names can have the same meaning), so
  things like
  |\pgfparserdef|\allowbreak|{myparser}|\allowbreak|{initial}\texttt{foo}|
  are possible as well.

  The \meta{action} might require arguments which you can specify in the
  optional \meta{arguments} string. The argument string can contain up to nine
  argument specifications of the following types:

  \begingroup
  \def\argdesc#1#2%
    {%
      \par
      \begingroup
      \setbox0\hbox{\texttt #1}\makebox[1.5em][l]{\usebox0}%
      \ifdim\wd0>1.5em\\\null\hspace{1.5em}\fi
      \parbox[t]{\dimexpr\linewidth-1.5em\relax}{#2}%
      \endgroup
    }%
    \argdesc{m}%
      {a normal mandatory argument}%
    \argdesc{r\meta{delim}}%
      {a mandatory argument which is read up to the \meta{delim}}%
    \argdesc{o}%
      {an optional argument in |[]| defaulting to a special mark}%
    \argdesc{O\marg{default}}%
      {like |o| but defaulting to \meta{default}}%
    \argdesc{d\meta{delim1}\meta{delim2}}%
      {%
        an optional argument in \meta{delim1} and \meta{delim2} defaulting to a
        special mark%
      }%
    \argdesc{D\meta{delim1}\meta{delim2}\marg{default}}%
      {like |d| but defaulting to \meta{default}}%
    \argdesc{t\meta{token}}%
      {%
        tests whether the next letter is \meta{token}, if so gobbles it and
        the argument is set to a special mark.%
      }%
  \endgroup

  So if you want to define an \meta{action} that takes two mandatory arguments
  you use |[mm]|, if it should take an optional star, one optional argument in
  brackets that returns a marker if it's not used, one mandatory and finally an
  optional argument in parentheses that defaults to |something| you use
  |[t*omD(){something}]| as the argument string. If the argument should be
  anything up to a semicolon, you use |[r;]|. Spaces before argument
  specifications in the string are ignored. So |[r m]| will be one argument and
  read anything up to an |m|. Also spaces before any argument in the parsed
  letters are ignored, so if |a| was setup to take an optional argument the
  argument would be detected in |a []|. Like with normal \LaTeXe\ optional
  arguments you have to protect nested brackets: |[a[bc]d]| would be read as
  |a[bc| with a trailing |d]|, \emph{not} as |a[bc]d|. You'd have to use
  |[{a[bc]d}]| to get it correct.

  Inside the \meta{action} you can perform almost any kind of code. This code
  will not be surrounded by a scope, so its effect persists after the parsing
  is done. However, each time after the \meta{action} is executed, control
  goes back to the parser. You should not launch a parser inside the
  \meta{action} code, unless you put it in a scope.

  When you use |all| as the \meta{state}, the \meta{action} is performed
  in all states as a fallback, whenever \meta{symbol meaning} is encountered.
  This means that when you do not specify anything explicitly for a state and a
  letter, but you do specify something for |all| and this letter, then the
  specified \meta{action} will be used.

  When the parser encounters a letter for which nothing is specified in the
  current state (neither directly nor indirectly via |all|), an error occurs.
  Additionally you can specify an action that is executed after the error is
  thrown using |\pgfparserdefunknown|. To suppress these errors (but not the
  action specified with |\pgfparserdefunknown|) you can use the
  |/pgfparser/silent| key or the |silent| key of the current \meta{parser name}.
\end{command}%

\begin{command}%
  {%
    \pgfparserlet
    \marg{parser name 1}\marg{state 1}\meta{symbol meaning 1}%
    \oarg{opt 1}\oarg{opt 2}\meta{symbol meaning 2}%
  }%
  If none of the optional arguments are given in the following explanation
  \meta{parser name 2} and \meta{state 2} are the same as
  \meta{parser name 1} and \meta{state 1}. If only the first is given
  \meta{state 2} equals \meta{opt 1}. If both are given \meta{parser name 2}
  equals \meta{opt 1} and \meta{state 2} equals \meta{opt 2}.

  Defines an action for \meta{parser name 1} in \meta{state 1} for the
  \meta{symbol meaning 1} to do the same as the action of \meta{parser name 2}
  in \meta{state 2} for the \meta{symbol meaning 2}. For \meta{symbol meaning 1}
  and \meta{symbol meaning 2} the same parsing rules apply as for \meta{symbol
  meaning} in |\pgfparserdef| so you either give the meaning in braces or just
  the symbol.
\end{command}%

\begin{command}{\pgfparserdefunknown\marg{parser name}\marg{state}\marg{action}}%
  With this macro you can define an \meta{action} for the \meta{parser name}
  parser in \meta{state} if no action was defined for the letter which was
  encountered.
\end{command}%

\begin{command}{\pgfparserdeffinal\marg{parser name}\marg{action}}%
  Every parser can call a final \meta{action} after the state was switched to
  |final|. This \meta{action} is executed after everything else, so you can use
  something that grabs more arguments if you want to.
\end{command}%

\begin{command}{\pgfparserswitch\marg{state}}%
  This command can be called inside the action code of a parser to cause a
  state switch to \meta{state}.
\end{command}%

\begin{command}{\pgfparserifmark\marg{arg}\marg{true}\marg{false}}%
  Remember that some of the optional argument types set special marks? With
  |\pgfparserifmark| you can test whether \meta{arg} is such a mark. So if
  there was no optional argument for the argument types |o| and |d| the
  \meta{true} branch will be executed, else the \meta{false} branch. For the |t|
  type argument the \meta{true} branch is executed if the token was encountered.
\end{command}%

\begin{command}{\pgfparserreinsert}%
  You can use this as the final macro in an action of |\pgfparserdef| or
  |\pgfparserdefunknown|. This has the effect that the contents of
  |\pgfparserletter| will be parsed next. Without any redefinition the result
  will be that the last token will be parsed again. You can change the
  definition of |\pgfparserletter| just before |\pgfparserreinsert| as well to
  parse some specific tokens next.
\end{command}%

\begin{command}{\pgfparserstate}%
  Expands to the current state of the parser.
\end{command}%

\begin{command}{\pgfparsertoken}%
  This is the macro which is let to the following token with |\futurelet|. You
  can use it inside an action code.
\end{command}%

\begin{command}{\pgfparserletter}%
  This macro stores the letter to which |\pgfparsertoken| was let. So if
  you'd use |\pgfparserparse{foo}a| this macro would be defined with
  |\def\pgfparserletter{a}|. This definition is done before any action code is
  executed. There are four special cases: If the next token is of category code
  1, 2, 6, or 10, so with standard category codes the tokens |{|, |}|, |#|, and
  \textvisiblespace\ (a space), it would be treated differently. In those cases
  this macro expands to |\bgroup|, |\egroup|, |##|, and \textvisiblespace\ for
  the categories 1, 2, 6, and 10, respectively.
\end{command}%

\begin{command}{\pgfparserset\marg{key list}}%
  The |pgfparser| module has a few keys you can access through this macro. It
  is just a shortcut for |\pgfset{/pgfparser/.cd,#1}|. The available keys are
  listed in subsection~\ref{sec:parser:keys}.
\end{command}%


\subsection{Keys of the Parser Module}
\label{sec:parser:keys}

\begin{key}{/pgfparser/silent=\meta{boolean} (initially false)}%
  If |true| then no error will be thrown when a letter is parsed for which no
  action is specified, silently ignoring it. This holds true for every parser.
\end{key}%

\begin{key}{/pgfparser/status=\meta{boolean} (initially false)}%
  If |true| every parser prints a status message for every action executed. This
  might help in debugging and understanding what the parser does.
\end{key}%

Additionally to those keys for every \meta{parser name} for which
|\pgfparserdef|, |\pgfparserdefunknown| or |\pgfparserlet| was run at least once
the following will be defined:

\begin{key}{/pgfparser/\meta{parser name}/silent=\meta{boolean} (initially false)}%
  If |true| the parser \meta{parser name} will silently ignore undefined
  letters. This is an individual equivalent of |/pgfparser/silent| for each
  defined parser.
\end{key}%


\subsection{Examples}

The following example counts the different letters appearing in a more or less
random string of letters. Every letter is counted only once, this is achieved
by defining a new action for every encountered unknown letter that does
nothing. We can define such rule without knowing which letter is used, because
|\pgfparsertoken| has the same meaning as that letter.
%
\begin{codeexample}[
    preamble={\usepgfmodule{parser}},
    pre={\newcount\mycount},
]
\mycount=0
% using the shortcut syntax of just placing ; after the state
\pgfparserdef{different letters}{all};{\pgfparserswitch{final}}%
\pgfparserdefunknown{different letters}{all}%
  {\pgfparserdef{different letters}{all}\pgfparsertoken{}\advance\mycount1}%
\pgfparserdeffinal{different letters}%
  {\the\mycount\ different letters found}%
% don't throw errors for unknown letters
\pgfparserset{different letters/silent=true}%

\pgfparserparse{different letters}udiaternxqlchudiea;
\end{codeexample}%

Next we want to try something that uses some of the different argument types
available.
%
\begin{codeexample}[preamble={\usepgfmodule{parser}}]
% using the long syntax of \pgfparserdef
\pgfparserdef{arguments}{initial}{the letter a}[d()]
  {\pgfparserifmark{#1}{\textcolor{red}{\textit{use}}}{\textbf{#1}} }%
% using the shortcut syntax
\pgfparserdef{arguments}{initial}t[m]{\texttt{#1} }%
\pgfparserdef{arguments}{initial}c[t*O{blue}m]
  {\pgfparserifmark{#1}{#3}{\textcolor{#2}{#3}}}%
\pgfparserdef{arguments}{all};{\pgfparserswitch{final}}%

\pgfparserparse{arguments}t{nobody}a(will)ac[green]{P}c*{arse}c{r};
\end{codeexample}%
